*Counterfactual between-class inequality levels

*Tim Goedemé, 28/10/2020
*This version 2: interaction terms with social class added; Number of cases therefore reduced.

/*

Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. 
This file can be changed and re-shared for non-commercial use, as long as our original work 
is recognised and the revised work is made available under the same conditions.

When using this do-file, please cite as:
Goedemé, T., Nolan, B., Paskov, M., & Weisstanner, D. (2021). 
Occupational Social Class and Earnings Inequality in Europe: A Comparative Assessment. 
In: Social Indicators Research. DOI: https://doi.org/10.1007/s11205-021-02746-z; https://timgoedeme.com/tools/esec-in-eu-silc/

This file makes use of the subprop command which I wrote, but is not available 
online. Please get in touch if you would like to make use of this command.

*/

*Globals

global place1 A:\Stata files\Oxfiles\EU-SILC\Cross\2020-04\
global place2 C:\Users\tgoedeme\Dropbox\Publicaties\2021\Between-class inequality\Estimations\Between-MLD Gender\
global countries AT BE BG CH CY CZ DE DK EE EL ES FI FR HR HU IE IT LT LU LV MT NL NO PL PT RO RS SE SI UK


*Estimating the counterfactual between-class MLD; only SILC 2018 - 9 classes
*****************************************************************************

*A. Assess incidence of class by gender and missings in earnings
*****************************************************************

clear
foreach ctry of global countries {
	di "`ctry'", _continue
	quietly {
		local year 2018
		cap confirm exist "${place1}\`ctry'\\`year'\c`ctry'`year'_addvars2.dta"
		if _rc==0 {
			append using "${place1}\`ctry'\\`year'\c`ctry'`year'_addvars2.dta"
			
			}
		
		}
}

drop age hydisp eqs hystd thresh60 arop60 actage active

ta country esec08 [iw=weight] if sex==0 & sub==1, row nofreq
ta country esec08 [iw=weight] if sex==1 & sub==1, row nofreq

replace sub = 0 if missers2!=0
replace sub = 0 if earns1<=0 | earns2<=0

ta country esec08 if sex==0 & sub==1
ta country esec08 if sex==1 & sub==1

inspect earns2 if sub==1

gen earnshere = earns2!=.

***==> Drop Denmark from list:
global countries AT BE BG CH CY CZ DE EE EL ES FI FR HR HU IE IT LT LU LV MT NL NO PL PT RO RS SE SI UK

*B. Females
************
***==> Drop Denmark from list:
global countries AT BE BG CH CY CZ DE EE EL ES FI FR HR HU IE IT LT LU LV MT NL NO PL PT RO RS SE SI UK

set matsize 5000, permanent

foreach ctry of global countries {
	di "`ctry'", _continue
	cap mat drop results	
	quietly {
		local year 2018
		cap use "${place1}\`ctry'\\`year'\c`ctry'`year'_addvars2.dta", clear
		if _rc==0 {
			drop age hydisp eqs hystd thresh60 arop60 actage active
			svyset psu1 [pw=weight], strata(strata1)
			
			replace sub = 0 if missers2!=0
			replace sub = 0 if earns1<=0 | earns2<=0
			
			replace sub = 0 if sex==0
			
			gen class=esec08
			
			*Drop classes that account for less than 1.5 of weighted population or with fewer than 30 observations
			if "`ctry'"=="NO" replace class = . if (esec08==4)
			if "`ctry'"=="NO" replace sub = 0 if (esec08==4)
			
			if regexm("AT EL IT PL RO RS", "`ctry'")!=1 replace class = . if esec08==5 // (class 5 accounts for less than 1.5% of population in paid employment)
			if regexm("AT EL IT PL RO RS", "`ctry'")!=1 replace sub = 0 if esec08==5 
			
			
			if "`ctry'"=="RO" replace class = . if (esec08==6)
			if "`ctry'"=="RO" replace sub = 0 if (esec08==6)
			
			if regexm("BE CY LU MT NL NO SE UK", "`ctry'")==1 replace class=. if esec08==8
			if regexm("BE CY LU MT NL NO SE UK", "`ctry'")==1 replace sub=0 if esec08==8
			
			
			drop esec08
			subprop3 class, subpop(sub) na(test)
			local rows=rowsof(test)
			local secondterm
			forvalues x=1/`rows' {
				local c=el(test, `x', 1)
				local s=el(test, `x', 2)/100
				local secondterm `secondterm' -`s'*ln(_b[`c'.class])
				
			}
			if regexm("BG HU PL RO", "`ctry'")==1 global vars c.fyfte i.education i.health c.career c.nchilds c.nadults c.ndepadults
			else global vars c.fyfte i.immigrant i.education i.health c.career c.nchilds c.nadults c.ndepadults
			if "`ctry'"=="SI" | "`ctry'"=="IE" | "`ctry'"=="LT" | "`ctry'"=="LV" | "`ctry'"=="MT" | "`ctry'"=="RS" global vars c.fyfte i.immigrant c.education i.health c.career c.nchilds c.nadults c.ndepadults
			if "`ctry'"=="HU" | "`ctry'"=="RO" global vars c.fyfte c.education i.health c.career c.nchilds c.nadults c.ndepadults
			*BG HU PL and RO: no immigrant variable due to too low prevalence; SI/HU: education as continuous variable as low educated does not occur in high salariat/ In LT it does not occur in class 6; LV: not in salariat & class 4; MT: now high educated in class 9
			
			cap mat drop results_`ctry' 
			local conflevel 0.025
			forvalues earnings = 1/2 {
				
								
				*1. Version 1: with interaction terms
				*************************************
				noi di "`earnings':1", _continue

				global interactions
				foreach var of global vars {
					global interactions ${interactions} i.class#`var' 
				}
				
				svy, subpop(sub): reg earns`earnings' i.class ${vars} ${interactions}
				estimates store `ctry'_`earnings'_1
				
				margins i.class, atmeans post subpop(if sub==1) grand // by adding 'grand' the counterfactual average is computed
				
				nlcom ln(_b[_cons]) `secondterm'
				
				local estimate=el(r(b), 1, 1)
				local SE=(el(r(V),1,1)^0.5)
				local DF=e(df_r)
				local LB=`estimate'+ (-1)*(invttail(`DF', `conflevel')*`SE')
				local UB=`estimate'+ (invttail(`DF', `conflevel')*`SE')
				mat def results_`ctry' = nullmat(results_`ctry' ) \ `earnings', 1, `estimate', `SE', `DF', `LB', `UB'

				*2. Version 2: without interaction terms
				****************************************
				noi di "`earnings':2", _continue
								
				svy, subpop(sub): reg earns`earnings' i.class ${vars}
				estimates store `ctry'_`earnings'_2
				
				margins i.class, atmeans post subpop(if sub==1) grand // by adding 'grand' the counterfactual average is computed
				
				nlcom ln(_b[_cons]) `secondterm'
				
				local estimate=el(r(b), 1, 1)
				local SE=(el(r(V),1,1)^0.5)
				local DF=e(df_r)
				local LB=`estimate'+ (-1)*(invttail(`DF', `conflevel')*`SE')
				local UB=`estimate'+ (invttail(`DF', `conflevel')*`SE')
				mat def results_`ctry' = nullmat(results_`ctry') \ `earnings', 2, `estimate', `SE', `DF', `LB', `UB'

			
			}
			
			
			noi di "."
			
		}
		*Write results to files
		***********************
		mat colnames results_`ctry' = earnings interact est se df lb ub
		mat li results_`ctry'
		clear
		svmat results_`ctry', names(matcol)
		gen country="`ctry'"
				
		save "${place2}\MLD_decomp2bis_`ctry'_esec9_F.dta", replace // esec08
		
		estout `ctry'_1_1 `ctry'_1_2 `ctry'_2_1 `ctry'_2_2 using "${place2}\Regs_23_Version2bis_`ctry'_coefs_F.txt", cells("b se p") stats(df_r r2) replace
	}
}

*Put all files together in a single file
*****************************************
global countries AT BE BG CH CY CZ DE EE EL ES FI FR HR HU IE IT LT LU LV MT NL NO PL PT RO RS SE SI UK
foreach ctry of global countries {
	use "${place2}\MLD_decomp2bis_`ctry'_esec9_F.dta", clear
	local vars earnings interact est se df lb ub
	foreach var of local vars {
		rename results_`ctry'`var' `var'
	}
	save "${place2}\MLD_decomp2bis_`ctry'_esec9_F.dta", replace
}

foreach ctry of global countries {
	append using "${place2}\MLD_decomp2bis_`ctry'_esec9_F.dta"
}
save "${place2}\MLD_decomp2bis_ALL_esec9_F.dta", replace

*Extract R2 and degress of freedom
***********************************

cap mat drop results1
cap mat drop results2
foreach ctry of global countries {
	clear
	insheet using "${place2}\Regs_23_Version2bis_`ctry'_coefs_F.txt"
	gen n11 = v2 if v1=="df_r" | v1=="r2"
	gen n12 = v5 if v1=="df_r" | v1=="r2"
	gen n21 = v8 if v1=="df_r" | v1=="r2"
	gen n22 = v11 if v1=="df_r" | v1=="r2"
	destring n*, replace
	
	cap mat drop tempmat1
	cap mat drop tempmat2
	forvalues earnings =1/2 {
			forvalues interact=1/2 {
				sum n`earnings'`interact'
				local val1=r(min)
				local val2 = r(max)
				mat def tempmat1 = nullmat(tempmat1), `val1'
				mat def tempmat2 = nullmat(tempmat2), `val2'
			}
	}
	mat def results1 = nullmat(results1) \ tempmat1
	mat def results2 = nullmat(results2) \ tempmat2
}
mat rownames results1 = ${countries}
mat colnames results1 = 1_1 1_2 2_1 2_2
mat li results1

mat rownames results2 = ${countries}
mat colnames results2 = 1_1 1_2 2_1 2_2
mat li results2

*B. Males
************
***==> Drop Denmark from list:
global countries AT BE BG CH CY CZ DE EE EL ES FI FR HR HU IE IT LT LU LV MT NL NO PL PT RO RS SE SI UK

set matsize 5000, permanent
foreach ctry of global countries {
	di "`ctry'", _continue
	cap mat drop results	
	quietly {
		local year 2018
		cap use "${place1}\`ctry'\\`year'\c`ctry'`year'_addvars2.dta", clear
		if _rc==0 {
			drop age hydisp eqs hystd thresh60 arop60 actage active
			svyset psu1 [pw=weight], strata(strata1)
			
			replace sub = 0 if missers2!=0
			replace sub = 0 if earns1<=0 | earns2<=0
			
			replace sub = 0 if sex==1
			
			gen class=esec08
			
			*Drop classes that account for less than 1.5 of weighted population or with fewer than 30 observations
			if "`ctry'"=="NO" replace class = . if (esec08==4)
			if "`ctry'"=="NO" replace sub = 0 if (esec08==4)
			
			if regexm("BE CH CZ DE DK EE LU MT NL NO PT SE", "`ctry'")==1 replace class = . if esec08==5 // (class 5 accounts for less than 1.5% of population in paid employment)
			if regexm("BE CH CZ DE DK EE LU MT NL NO PT SE", "`ctry'")==1 replace sub = 0 if esec08==5 

			if regexm("BE CY LU MT NL NO SE UK", "`ctry'")==1 replace class=. if esec08==8
			if regexm("BE CY LU MT NL NO SE UK", "`ctry'")==1 replace sub=0 if esec08==8
			
			set seed 10542
			if "`ctry'"=="RO" replace fyfte = fyfte+runiform(-0.05, 0.05) if class==2 | class==6 // introduce some variance in fyfte for these classes
			if "`ctry'"=="SE" replace fyfte = fyfte+runiform(-0.05, 0.05) if class==6 // introduce some variance in fyfte for these classes
			drop esec08
			subprop3 class, subpop(sub) na(test)
			local rows=rowsof(test)
			local secondterm
			forvalues x=1/`rows' {
				local c=el(test, `x', 1)
				local s=el(test, `x', 2)/100
				local secondterm `secondterm' -`s'*ln(_b[`c'.class])
				
			}
			if regexm("BG FI FR HU PL RO", "`ctry'")==1 global vars c.fyfte i.education i.health c.career c.nchilds c.nadults c.ndepadults
			else global vars c.fyfte i.immigrant i.education i.health c.career c.nchilds c.nadults c.ndepadults
			if "`ctry'"=="LT" global vars c.fyfte i.immigrant c.education i.health c.career c.nchilds c.nadults c.ndepadults
			if "`ctry'"=="SI" global vars c.fyfte c.education i.health c.career c.nchilds c.nadults c.ndepadults
			*BG HU PL and RO: no immigrant variable due to too low prevalence, in FI/FR missing for class 5; SI: education as continuous variable as low educated does not occur in high salariat
			
			cap mat drop results_`ctry' 
			local conflevel 0.025
			forvalues earnings = 1/2 {
				
								
				*1. Version 1: with interaction terms
				*************************************
				noi di "`earnings':1", _continue

				global interactions
				foreach var of global vars {
					global interactions ${interactions} i.class#`var' 
				}
				
				svy, subpop(sub): reg earns`earnings' i.class ${vars} ${interactions}
				estimates store `ctry'_`earnings'_1
				
				margins i.class, atmeans post subpop(if sub==1) grand // by adding 'grand' the counterfactual average is computed
				
				nlcom ln(_b[_cons]) `secondterm'
				
				local estimate=el(r(b), 1, 1)
				local SE=(el(r(V),1,1)^0.5)
				local DF=e(df_r)
				local LB=`estimate'+ (-1)*(invttail(`DF', `conflevel')*`SE')
				local UB=`estimate'+ (invttail(`DF', `conflevel')*`SE')
				mat def results_`ctry' = nullmat(results_`ctry' ) \ `earnings', 1, `estimate', `SE', `DF', `LB', `UB'

				*2. Version 2: without interaction terms
				****************************************
				noi di "`earnings':2", _continue
								
				svy, subpop(sub): reg earns`earnings' i.class ${vars}
				estimates store `ctry'_`earnings'_2
				
				margins i.class, atmeans post subpop(if sub==1) grand // by adding 'grand' the counterfactual average is computed
				
				nlcom ln(_b[_cons]) `secondterm'
				
				local estimate=el(r(b), 1, 1)
				local SE=(el(r(V),1,1)^0.5)
				local DF=e(df_r)
				local LB=`estimate'+ (-1)*(invttail(`DF', `conflevel')*`SE')
				local UB=`estimate'+ (invttail(`DF', `conflevel')*`SE')
				mat def results_`ctry' = nullmat(results_`ctry') \ `earnings', 2, `estimate', `SE', `DF', `LB', `UB'

			
			}
			
			
			noi di "."
			
		}
		*Write results to files
		***********************
		mat colnames results_`ctry' = earnings interact est se df lb ub
		mat li results_`ctry'
		clear
		svmat results_`ctry', names(matcol)
		gen country="`ctry'"
				
		save "${place2}\MLD_decomp2bis_`ctry'_esec9_M.dta", replace // esec08
		
		estout `ctry'_1_1 `ctry'_1_2 `ctry'_2_1 `ctry'_2_2 using "${place2}\Regs_23_Version2bis_`ctry'_coefs_M.txt", cells("b se p") stats(df_r r2) replace
	}
}

*Put all files together in a single file
*****************************************
global countries AT BE BG CH CY CZ DE EE EL ES FI FR HR HU IE IT LT LU LV MT NL NO PL PT RO RS SE SI UK
foreach ctry of global countries {
	use "${place2}\MLD_decomp2bis_`ctry'_esec9_M.dta", clear
	local vars earnings interact est se df lb ub
	foreach var of local vars {
		rename results_`ctry'`var' `var'
	}
	save "${place2}\MLD_decomp2bis_`ctry'_esec9_M.dta", replace
}

foreach ctry of global countries {
	append using "${place2}\MLD_decomp2bis_`ctry'_esec9_M.dta"
}
save "${place2}\MLD_decomp2bis_ALL_esec9_M.dta", replace

*Extract R2 and degress of freedom
***********************************

cap mat drop results1
cap mat drop results2
foreach ctry of global countries {
	clear
	insheet using "${place2}\Regs_23_Version2bis_`ctry'_coefs_M.txt"
	gen n11 = v2 if v1=="df_r" | v1=="r2"
	gen n12 = v5 if v1=="df_r" | v1=="r2"
	gen n21 = v8 if v1=="df_r" | v1=="r2"
	gen n22 = v11 if v1=="df_r" | v1=="r2"
	destring n*, replace
	
	cap mat drop tempmat1
	cap mat drop tempmat2
	forvalues earnings =1/2 {
			forvalues interact=1/2 {
				sum n`earnings'`interact'
				local val1=r(min)
				local val2 = r(max)
				mat def tempmat1 = nullmat(tempmat1), `val1'
				mat def tempmat2 = nullmat(tempmat2), `val2'
			}
	}
	mat def results1 = nullmat(results1) \ tempmat1
	mat def results2 = nullmat(results2) \ tempmat2
}
mat rownames results1 = ${countries}
mat colnames results1 = 1_1 1_2 2_1 2_2
mat li results1

mat rownames results2 = ${countries}
mat colnames results2 = 1_1 1_2 2_1 2_2
mat li results2

